#load packages
library(tidyverse)
── Attaching core tidyverse packages ───────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 3.5.1 ✔ tibble 3.2.1
✔ lubridate 1.9.3 ✔ tidyr 1.3.1
✔ purrr 1.0.2 ── Conflicts ─────────────────────────────────── tidyverse_conflicts() ──
✖ tidyr::expand() masks Matrix::expand()
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
✖ tidyr::pack() masks Matrix::pack()
✖ tidyr::unpack() masks Matrix::unpack()
ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
library(stringr)
library(broom)
library(lme4)
library(psych)
Attaching package: ‘psych’
The following objects are masked from ‘package:ggplot2’:
%+%, alpha
library(afex)
************
Welcome to afex. For support visit: http://afex.singmann.science/
- Functions for ANOVAs: aov_car(), aov_ez(), and aov_4()
- Methods for calculating p-values with mixed(): 'S', 'KR', 'LRT', and 'PB'
- 'afex_aov' and 'mixed' objects can be passed to emmeans() for follow-up tests
- Get and set global package options with: afex_options()
- Set sum-to-zero contrasts globally: set_sum_contrasts()
- For example analyses see: browseVignettes("afex")
************
Attaching package: ‘afex’
The following object is masked from ‘package:lme4’:
lmer
library(sjPlot)
library(effects)
Loading required package: carData
lattice theme set by effectsTheme()
See ?effectsTheme for details.
library(sjstats)
Attaching package: ‘sjstats’
The following object is masked from ‘package:psych’:
phi
The following object is masked from ‘package:broom’:
bootstrap
library(ngramr)
library(Rmisc) #for SummarySE
Loading required package: lattice
Loading required package: plyr
-----------------------------------------------------------------------
You have loaded plyr after dplyr - this is likely to cause problems.
If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
library(plyr); library(dplyr)
-----------------------------------------------------------------------
Attaching package: ‘plyr’
The following objects are masked from ‘package:dplyr’:
arrange, count, desc, failwith, id, mutate, rename,
summarise, summarize
The following object is masked from ‘package:purrr’:
compact
library(plyr)
library(dplyr)
library(rstatix)
Attaching package: ‘rstatix’
The following objects are masked from ‘package:plyr’:
desc, mutate
The following object is masked from ‘package:sjstats’:
t_test
The following object is masked from ‘package:stats’:
filter
library(emmeans)
library(MuMIn)
library(lmerTest)
library(effectsize)
Attaching package: ‘effectsize’
The following objects are masked from ‘package:rstatix’:
cohens_d, eta_squared
The following objects are masked from ‘package:sjstats’:
cohens_f, cramers_v, phi
The following object is masked from ‘package:psych’:
phi
library(forecast)
Registered S3 method overwritten by 'quantmod':
method from
as.zoo.data.frame zoo
This is forecast 8.23.0
Use suppressPackageStartupMessages() to eliminate package startup messages.
library(ggiraph)
library(packcircles)
library(viridis)
Loading required package: viridisLite
library(htmlwidgets)
library(jtools)
Attaching package: ‘jtools’
The following object is masked from ‘package:effectsize’:
standardize
Load and clean data
#read csv
ws_012425 <- read_csv('world_survey_difficult_teach_learn_012425.csv')
New names:Rows: 60 Columns: 92── Column specification ─────────────────────────────────────────────────
Delimiter: ","
chr (55): Recorded Date, Lang, setting, setting1, country, country1, ...
dbl (29): years_teaching_ES, difficult_learn_ser_estar, difficult_lea...
lgl (8): UCD_affiliation1, UCD_classes1, UCD_classes_prior1, gender1...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#view(ws_012425)
#difficult to learn
#pivot to show
ws_012425_diffl <- ws_012425 |>
dplyr::select(L1_Romance, 35:42)|>
pivot_longer(cols = 2:9,
names_to = "Grammatical Structure",
values_to = "Difficulty")|>
dplyr::mutate(gram_struc =
case_when(
`Grammatical Structure` == 'difficult_learn_ser_estar' ~ 'ser and estar',
`Grammatical Structure` == 'difficult_learn_grammatical_gender_agreement' ~ 'grammatical gender agreement',
`Grammatical Structure` == 'difficult_learn_pret_imp' ~ 'preterit and imperfect',
`Grammatical Structure` == 'difficult_learn_subjunctive' ~ 'subjunctive',
`Grammatical Structure` == 'difficult_learn_por_para' ~ 'por and para',
`Grammatical Structure` == 'difficult_learn_gustar' ~ 'gustar and gustar-like verbs',
`Grammatical Structure` == 'difficult_learn_ir' ~ 'ir a + infinitive',
`Grammatical Structure` == 'difficult_learn_irreg' ~ 'irregular verbs in the present',
)
)|>
drop_na(Difficulty)|>
#I realize the data is coded from 3= extremely easy, -3 is extremely difficult, so I need to multiply all values by -1
dplyr::mutate(Difficulty1 = (Difficulty * -1))
#view(ws_012425_diffl)
#let's narrow it down to just a few
ws_012425_diffl_4 <- ws_012425_diffl |>
dplyr::filter(gram_struc == 'grammatical gender agreement' |
gram_struc =='irregular verbs in the present'|
gram_struc =='subjunctive'|
gram_struc =='gustar and gustar-like verbs'|
gram_struc =='preterit and imperfect')
#view(ws_012425_diffl_4)
Difficult to Learn
#boxplots
ggplot(ws_012425_diffl_4, aes(x=gram_struc,
y=Difficulty1,
color = gram_struc
)) +
geom_boxplot()+
labs(x = "Grammatical Structure", y = "Difficulty")+
geom_jitter(shape=1)+
scale_color_brewer(palette="Set1")+
theme(axis.text.x = element_text(angle = 45, hjust = 0.80, vjust = 0.9),
text = element_text(size = 13))+
ylim(-4, 4)
#pret
pret_token_freq_log <- read_csv('CEDEL2_pret_token_freq_log_22.csv')
Rows: 300 Columns: 13── Column specification ─────────────────────────────────────────────────
Delimiter: ","
chr (1): verb
dbl (12): L2_freq, NS_freq, L2_freq_log, NS_freq_log, L2_freq_log_reg...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#imp
imp_token_freq_log <- read_csv('CEDEL2_imp_token_freq_log_22.csv')
Rows: 129 Columns: 9── Column specification ─────────────────────────────────────────────────
Delimiter: ","
chr (1): verb
dbl (8): L2_freq, NS_freq, L2_freq_log, NS_freq_log, L2_freq_log_reg,...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#distribution- need non-parametric test
# hist(pret_token_freq_log$NS_freq_log)
# hist(pret_token_freq_log$L2_freq_log)
# hist(imp_token_freq_log$NS_freq_log)
# hist(imp_token_freq_log$L2_freq_log)
#spearman rank correlation
#preterit
cor.test(pret_token_freq_log$NS_freq_log, pret_token_freq_log$L2_freq_log, method = "spearman")
Warning: Cannot compute exact p-value with ties
Spearman's rank correlation rho
data: pret_token_freq_log$NS_freq_log and pret_token_freq_log$L2_freq_log
S = 2071667, p-value < 2.2e-16
alternative hypothesis: true rho is not equal to 0
sample estimates:
rho
0.5396244
#imperfect
cor.test(imp_token_freq_log$NS_freq_log, imp_token_freq_log$L2_freq_log, method = "spearman")
Warning: Cannot compute exact p-value with ties
Spearman's rank correlation rho
data: imp_token_freq_log$NS_freq_log and imp_token_freq_log$L2_freq_log
S = 144765, p-value = 9.978e-14
alternative hypothesis: true rho is not equal to 0
sample estimates:
rho
0.5953563
Preterit
#preterit
#log10
ggplot(pret_token_freq_log, aes(x =NS_freq_log, y = L2_freq_log))+
geom_point()+
labs(x = "Log Transformed Frequency in L1 Corpus",
y = "Log Transformed Frequency in L2 Corpus")+
ylim(0,3)+
xlim(0,3)
Imperfect
#imperfect
#log10
ggplot(imp_token_freq_log, aes(x =NS_freq_log, y = L2_freq_log))+
geom_point()+
labs(x = "Log Transformed Frequency in L1 Corpus",
y = "Log Transformed Frequency in L2 Corpus")+
ylim(0,3)+
xlim(0,3)
##run the program: Coll.analysis
#source("http://www.stgries.info/teaching/groningen/coll.analysis.r")
#used -log10 (Fisher-Yates exact, one-tailed) (= default) option
#preterit
DCA_CEDEL2_ap <- read_csv('DCA_CEDEL2_ap_22.csv')
Rows: 31 Columns: 6── Column specification ─────────────────────────────────────────────────
Delimiter: ","
chr (3): words, pref.occur, text
dbl (3): freq_imp, freq_pret, coll.strength
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#imperfect
DCA_CEDEL2_ai <- read_csv('DCA_CEDEL2_ai_22.csv')
Rows: 13 Columns: 6── Column specification ─────────────────────────────────────────────────
Delimiter: ","
chr (3): words, pref.occur, text
dbl (3): freq_imp, freq_pret, coll.strength
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Preterit results
packing <- circleProgressiveLayout(DCA_CEDEL2_ap$coll.strength, sizetype='area')
#view(packing)
data <- cbind(DCA_CEDEL2_ap, packing)
#view(data)
dat.gg <- circleLayoutVertices(packing, npoints=50)
#view(dat.gg)
# Make the plot with a few differences compared to the static version:
p <- ggplot() +
geom_polygon_interactive(data = dat.gg, aes(x, y, group = id, fill=id, tooltip = data$text[id], data_id = id), colour = "black", alpha = 0.6) +
scale_fill_viridis() +
geom_text(data = data, aes(x, y, label = gsub("Group_", "", words)), size=4, color="black") +
theme_void() +
theme(legend.position="none", plot.margin=unit(c(0,0,0,0),"cm") ) +
coord_equal()
# Turn it interactive
girafe(ggobj = p, width_svg = 7, height_svg = 7)
Imperfect results
# Generate the layout
packing1 <- circleProgressiveLayout(DCA_CEDEL2_ai$coll.strength, sizetype='area')
data1 <- cbind(DCA_CEDEL2_ai, packing1)
dat.gg1 <- circleLayoutVertices(packing1, npoints=50)
# Make the plot with a few differences compared to the static version:
p1 <- ggplot() +
geom_polygon_interactive(data = dat.gg1, aes(x, y, group = id, fill=id, tooltip = data1$text[id], data_id = id), colour = "black", alpha = 0.6) +
scale_fill_viridis() +
geom_text(data = data1, aes(x, y, label = gsub("Group_", "", words)), size=3.5, color="black") +
theme_void() +
theme(legend.position="none", plot.margin=unit(c(0,0,0,0),"cm") ) +
coord_equal()
# Turn it interactive
girafe(ggobj = p1, width_svg = 7, height_svg = 7)
All courses
#read csv
csv18_new_marking_sandbox_pi <- read_csv( 'csv18_new_marking_sandbox_pi_22.csv')
Rows: 140 Columns: 7── Column specification ─────────────────────────────────────────────────
Delimiter: ","
chr (4): quarter, modality, course, Measure
dbl (3): id, Word count, normalized_count_per_100_tokens
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#boxplot
ggplot(csv18_new_marking_sandbox_pi, aes(x=course,
y=normalized_count_per_100_tokens,
color = Measure)) +
geom_boxplot()+
labs(x = "Course Level", y = "Use of Past Form per 100 Words", color = "Tense-Aspect")+
geom_jitter(shape=1, width = 0.3)+
scale_color_brewer(palette="Set1")+
theme(text = element_text(size = 13))
Just post-instruction
#boxplot just SPA 2
csv18_new_marking_sandbox_pi_2 <- csv18_new_marking_sandbox_pi |>
dplyr::filter(course == 'SPA 2')
#graph
ggplot(csv18_new_marking_sandbox_pi_2, aes(x=course,
y=normalized_count_per_100_tokens,
color = Measure)) +
geom_boxplot()+
labs(x = "Course Level", y = "Use of Past Form per 100 Words", color = "Tense-Aspect")+
geom_jitter(shape=1, width = 0.3)+
scale_color_brewer(palette="Set1")+
theme(text = element_text(size = 13))+
ylim(0, 15)
csv18_new_percents_p <- read_csv('csv18_new_percents_p_24.csv')
Rows: 28 Columns: 3── Column specification ─────────────────────────────────────────────────
Delimiter: ","
chr (2): Course level, Tense-aspect form
dbl (1): Percent use
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ggplot(csv18_new_percents_p, aes(x = `Course level`, y = `Percent use`, fill = `Tense-aspect form`))+
geom_bar(stat = "identity", position = "stack")+
#facet_wrap(~n_person1)+
theme_minimal()+
#labs(x = "Course level", y = "Percent use of forms in preterit-appropriate contexts")+
theme(axis.text.x = element_text(angle = 45, hjust = 0.80, vjust = 0.9),
text = element_text(size = 20))+
scale_fill_manual(values=c("#669C99", "#9999CC", "#66CCFF", "#CC6666"))
Imperfect
#cohen's d
cohens_d(appropriateness ~ Proficiency, data = data_cows_SPA12)
Warning: 'y' is numeric but has only 2 unique values.
If this is a grouping variable, convert it to a factor.
Cohen's d | 95% CI
--------------------------
-1.42 | [-1.63, -1.21]
- Estimated using pooled SD.
Effect plot
#here, I'm trying to create effect plots for this first course level effect
#let's take a look at the dataset
#view(data_cows_SPA12)
#let's plot accuracy by course level
jtools::effect_plot(modelrq3_1, pred = Proficiency,
x.label = 'Course Level',
y.label = 'Accuracy'
)+
ylim(0, 1.0)
Confidence intervals for merMod models is an experimental feature.
The intervals reflect only the variance of the fixed effects, not
the random effects.
data_rq1_post_instruction <- read_csv('data_rq1_post_instruction_24.csv')
Rows: 1764 Columns: 33── Column specification ───────────────────────────────────────────────────────────
Delimiter: ","
chr (21): Subset, Rater, Essay, Verb List.x, Infinitive.x, Form, Corrected Form...
dbl (12): Number, Appropriate, Ambiguous, Inappropriate, log_frq, Spreadsheet, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#now, we'll be considering tense (binary), regularity (binary), and frequency (log-transformed continuous)
#tense and regularity will become +/- 0.5
#now let's center and scale the frequency data
center_scale <- function(x) {
scale(x, scale = TRUE) #changed 9-28-22
#scale = TRUE to center and scale
}
#this is from this blog https://www.gastonsanchez.com/visually-enforced/how-to/2014/01/15/Center-data-in-R/
#need to center the course levels
#SPA 2 = -2.5
#SPA 3 = -1.5
#SPA 21 = -.5
#SPA 22 = .5
#SPA 23 = 1.5
#SPA 24 = 2.5
#turn Proficiency levels into centered variable
data_rq1_post_instruction <- data_rq1_post_instruction %>%
mutate(course_level = case_when(
Proficiency == 'SPA 2' ~ 'level 2',
Proficiency == 'SPA 3' ~ 'level 3',
Proficiency == 'SPA 21' ~ 'level 4',
Proficiency == 'SPA 22' ~ 'level 5',
Proficiency == 'SPA 23' ~ 'level 6',
Proficiency == 'SPA 24' ~ 'level 7'
))%>%
mutate(course_level_cont = case_when(
Proficiency == 'SPA 2' ~ -2.5,
Proficiency == 'SPA 3' ~ -1.5,
Proficiency == 'SPA 21' ~ -0.5,
Proficiency == 'SPA 22' ~ 0.5,
Proficiency == 'SPA 23' ~ 1.5,
Proficiency == 'SPA 24' ~ 2.5
))%>%
mutate(tense = case_when(
Corrected_num == 'imperfect' ~ -0.5, #imperfect negative
Corrected_num == 'preterit' ~ 0.5 #preterit positive
))%>%
mutate(regularity = case_when(
Regularity == 'I' ~ -0.5, #irregular is -0.5
Regularity == 'R' ~ 0.5 #regular is 0.5 (positive)
))%>%
mutate(
#change 11-11-22: frequency as sum_log_frq
frequency = center_scale(data_rq1_post_instruction$log_sum_frq)
#frequency now ranges from approx. -2.7 to +1.53
)%>%
drop_na(regularity)%>% #getting rid of NAs, still 1801 observations
mutate(
infinitive = tolower(Infinitive_no_reflex)
)
#just the preterit
data_rq1_post_instruction_pret <- data_rq1_post_instruction%>%
filter(Corrected_num == 'preterit')
#view(data_rq1_post_instruction_pret)
#just the imperfect
data_rq1_post_instruction_imp <- data_rq1_post_instruction%>%
filter(Corrected_num == 'imperfect')
#view(data_rq1_post_instruction_imp)
model21_092923 <- glmer(appropriateness ~ Corrected_num + frequency + regularity +
(1+ Corrected_num + frequency + regularity||Essay) + (1|infinitive), data_rq1_post_instruction,
family = 'binomial', control = glmerControl(optimizer="bobyqa", optCtrl=list(maxfun=50000)))
Warning: Model is nearly unidentifiable: large eigenvalue ratio
- Rescale variables?
summary(model21_092923)
Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) [
glmerMod]
Family: binomial ( logit )
Formula: appropriateness ~ Corrected_num + frequency + regularity + (1 +
Corrected_num + frequency + regularity || Essay) + (1 | infinitive)
Data: data_rq1_post_instruction
Control: glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 50000))
AIC BIC logLik deviance df.resid
1331.9 1392.1 -655.0 1309.9 1753
Scaled residuals:
Min 1Q Median 3Q Max
-6.1650 0.1388 0.2376 0.3516 1.7647
Random effects:
Groups Name Variance Std.Dev. Corr
infinitive (Intercept) 0.36056 0.6005
Essay regularity 0.26180 0.5117
Essay.1 frequency 0.09777 0.3127
Essay.2 Corrected_numimperfect 1.30934 1.1443
Corrected_numpreterit 1.08541 1.0418 -0.21
Essay.3 (Intercept) 1.09286 1.0454
Number of obs: 1764, groups: infinitive, 246; Essay, 60
Fixed effects:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 1.1183 0.3059 3.656 0.000256 ***
Corrected_numpreterit 1.7601 0.3470 5.073 3.92e-07 ***
frequency 0.4082 0.1388 2.942 0.003266 **
regularity 0.4891 0.3091 1.582 0.113569
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) Crrct_ frqncy
Crrctd_nmpr -0.615
frequency 0.039 0.176
regularity -0.360 0.306 0.337
optimizer (bobyqa) convergence code: 0 (OK)
Model is nearly unidentifiable: large eigenvalue ratio
- Rescale variables?
#for tense
effect_plot(model21_092923, pred = Corrected_num,
x.label = 'Tense-aspect',
y.label = 'Accuracy',
pred.labels = c('Imperfect', 'Preterit'),
interval = TRUE)+
ylim(0, 1.0)
Confidence intervals for merMod models is an experimental feature. The
intervals reflect only the variance of the fixed effects, not the random
effects.
#for tense
effect_plot(model21_092923, pred = Corrected_num,
x.label = 'Tense-aspect',
y.label = 'Accuracy',
pred.labels = c('Imperfect', 'Preterit'),
interval = TRUE)+
ylim(0, 1.0)
Preterit
model3p <- glmer(appropriateness ~ frequency +
(1+ frequency||Essay) + (1|infinitive), data_rq1_post_instruction_pret,
family = 'binomial', control = glmerControl(optimizer="bobyqa", optCtrl=list(maxfun=50000)))
summary(model3p)
Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) [
glmerMod]
Family: binomial ( logit )
Formula: appropriateness ~ frequency + (1 + frequency || Essay) + (1 |
infinitive)
Data: data_rq1_post_instruction_pret
Control: glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 50000))
AIC BIC logLik deviance df.resid
617.1 641.8 -303.5 607.1 1032
Scaled residuals:
Min 1Q Median 3Q Max
-5.4198 0.1374 0.2126 0.2899 1.2052
Random effects:
Groups Name Variance Std.Dev.
infinitive (Intercept) 0.6037 0.7770
Essay frequency 0.0282 0.1679
Essay.1 (Intercept) 2.2406 1.4969
Number of obs: 1037, groups: infinitive, 213; Essay, 60
Fixed effects:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 2.9066 0.3168 9.174 <2e-16 ***
frequency 0.1082 0.1769 0.612 0.541
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr)
frequency 0.249
#r2
r.squaredGLMM(model3p)
Warning: the null model is correct only if all variables used by the original model remain unchanged.
R2m R2c
theoretical 0.001771195 0.4670640
delta 0.000831001 0.2191349
Imperfect
model3 <- glmer(appropriateness ~ frequency +
(1+ frequency||Essay) + (1|infinitive), data_rq1_post_instruction_imp,
family = 'binomial', control = glmerControl(optimizer="bobyqa", optCtrl=list(maxfun=50000)))
summary(model3)
Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) [
glmerMod]
Family: binomial ( logit )
Formula: appropriateness ~ frequency + (1 + frequency || Essay) + (1 |
infinitive)
Data: data_rq1_post_instruction_imp
Control: glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 50000))
AIC BIC logLik deviance df.resid
712.0 734.9 -351.0 702.0 722
Scaled residuals:
Min 1Q Median 3Q Max
-6.2212 -0.3672 0.2906 0.4663 2.1492
Random effects:
Groups Name Variance Std.Dev.
infinitive (Intercept) 0.3608 0.6007
Essay frequency 0.1874 0.4329
Essay.1 (Intercept) 2.5675 1.6024
Number of obs: 727, groups: infinitive, 91; Essay, 57
Fixed effects:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 1.1437 0.2890 3.957 7.6e-05 ***
frequency 0.4279 0.1829 2.340 0.0193 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr)
frequency 0.188
#r2
r.squaredGLMM(model3)
Warning: the null model is correct only if all variables used by the original model remain unchanged.
R2m R2c
theoretical 0.02473641 0.4992493
delta 0.02082311 0.4202680
#for frequency
effect_plot(model3p, pred = frequency,
x.label = 'Frequency (log-transformed)',
y.label = 'Accuracy',
interval = TRUE)+
ylim(0, 1.0)
Confidence intervals for merMod models is an experimental feature. The
intervals reflect only the variance of the fixed effects, not the random
effects.
#for frequency
effect_plot(model3, pred = frequency,
x.label = 'Frequency (log-transformed)',
y.label = 'Accuracy',
interval = TRUE)+
ylim(0, 1.0)
Confidence intervals for merMod models is an experimental feature. The
intervals reflect only the variance of the fixed effects, not the random
effects.
cohens_d(csv18_clean_cleanest3_past_pret_i$Appropriate,
csv18_clean_cleanest3_past_pret_c$Appropriate)
Warning: 'y' is numeric but has only 2 unique values.
If this is a grouping variable, convert it to a factor.
Cohen's d | 95% CI
-------------------------
0.04 | [-0.13, 0.21]
- Estimated using pooled SD.
#Accuracy by Group
jtools::effect_plot(model2, pred = group,
x.label = 'Group',
y.label = 'Accuracy'
)+
ylim(0, 1.0)+
theme(text = element_text(size = 13))
Confidence intervals for merMod models is an experimental feature. The
intervals reflect only the variance of the fixed effects, not the random
effects.
csv18_clean_cleanest3_past_imp <- read_csv('csv18_clean_cleanest3_past_imp.csv')
Rows: 225 Columns: 15── Column specification ───────────────────────────────────────────────────────────
Delimiter: ","
chr (9): Verb List, Infinitive, Form, Corrected Form, level, group, ID, Correct...
dbl (6): Number, Appropriate, Ambiguous, Inappropriate, Form_num, Corrected_num
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
model3 <- glmer(Appropriate ~ group +
(1|ID),
csv18_clean_cleanest3_past_imp,
family = 'binomial',
control = glmerControl(optimizer="bobyqa", optCtrl=list(maxfun=50000)))
summary(model3)
Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) [
glmerMod]
Family: binomial ( logit )
Formula: Appropriate ~ group + (1 | ID)
Data: csv18_clean_cleanest3_past_imp
Control: glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 50000))
AIC BIC logLik deviance df.resid
292.8 303.0 -143.4 286.8 222
Scaled residuals:
Min 1Q Median 3Q Max
-2.3890 -0.8445 0.4250 0.6583 1.8292
Random effects:
Groups Name Variance Std.Dev.
ID (Intercept) 1.234 1.111
Number of obs: 225, groups: ID, 42
Fixed effects:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.583739 0.383848 1.521 0.128
groupintervention -0.005685 0.494504 -0.011 0.991
Correlation of Fixed Effects:
(Intr)
gropntrvntn -0.744
r.squaredGLMM(model3)
Warning: the null model is correct only if all variables used by the original model remain unchanged.
R2m R2c
theoretical 1.767995e-06 0.2728413
delta 1.468262e-06 0.2265857
#no group effect
#Accuracy by Group
jtools::effect_plot(model3, pred = group,
x.label = 'Group',
y.label = 'Accuracy'
)+
ylim(0, 1.0)+
theme(text = element_text(size = 13))
Confidence intervals for merMod models is an experimental feature. The
intervals reflect only the variance of the fixed effects, not the random
effects.
#function for scaling
minmaxnormalise <- function(x){(x- min(x)) /(max(x)-min(x))}
#import data
Imp1stUG_012325 <- read_csv('Imp1stUG_012325.csv')|>
dplyr::mutate(
score_sqr = (score)^2,
score_cube = (score)^3,
score_boxcox = forecast::BoxCox(score, lambda = "auto"),
#center and scale
score_bx_scale = scale(score_boxcox, center = TRUE, scale = TRUE),
Group1 = case_when(
Group == 'PFG'~ 'control',
Group == 'IFG'~ 'intervention'
)
)
Rows: 429 Columns: 5── Column specification ───────────────────────────────────────────────────────────
Delimiter: ","
chr (4): Term, Group, Section, ID
dbl (1): score
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#view(Imp1stUG_012325)
#visualization of data
hist(Imp1stUG_012325$score_bx_scale)
#roughly normal
#boxplots
ggplot(Imp1stUG_012325, aes(x=Group1,
y=score
,
color = Group
)) +
geom_boxplot()+
labs(x = "Group", y = "Score")+
geom_jitter(shape=1, width = 0.3)+
scale_color_brewer(palette="Set1")+
theme(text = element_text(size = 13))+
ylim(0, 40)
#what lambda?
forecast::BoxCox(Imp1stUG_012325$score, lambda = "auto")
[1] 7.499463 97.484633 391.418692 199.463434 127.478683 449.404392 127.478683
[8] 161.471628 241.454071 449.404392 241.454071 449.404392 127.478683 449.404392
[15] 127.478683 241.454071 449.404392 337.431724 287.443509 199.463434 647.353683
[22] 161.471628 161.471628 337.431724 577.371905 577.371905 337.431724 577.371905
[29] 241.454071 127.478683 127.478683 49.493382 71.489517 127.478683 337.431724
[36] 527.509701 594.492475 560.501253 721.334118 464.525616 449.404392 23.997388
[43] 611.862961 241.454071 299.565679 189.590591 241.454071 479.896759 511.388802
[50] 449.404392 209.586204 543.880518 230.581523 543.880518 311.937771 377.547068
[57] 594.492475 275.571263 640.175565 49.493382 230.581523 638.387284 721.334118
[64] 511.388802 560.501253 560.501253 540.586362 161.471628 717.540128 570.593654
[71] 647.353683 717.540128 540.586362 573.977782 479.896759 135.602024 441.937499
[78] 385.840052 372.068397 638.387284 540.586362 638.387284 702.464136 479.896759
[85] 495.517822 241.454071 143.975296 419.911702 287.443509 143.975296 127.478683
[92] 419.911702 683.844069 419.911702 337.431724 143.975296 252.576544 287.443509
[99] 83.987206 363.925365 71.489517 405.540237 337.431724 219.958901 287.443509
[106] 241.454071 449.404392 161.471628 241.454071 449.404392 511.388802 337.431724
[113] 241.454071 161.471628 161.471628 511.388802 17.498279 511.388802 449.404392
[120] 337.431724 337.431724 287.443509 391.418692 449.404392 337.431724 127.478683
[127] 419.911702 665.473918 577.371905 495.517822 1.499953 527.509701 311.937771
[134] 511.388802 189.590591 287.443509 241.454071 350.553584 464.525616 449.404392
[141] 647.353683 629.483364 419.911702 23.997388 391.418692 391.418692 337.431724
[148] 363.925365 179.967675 263.948941 287.443509 179.967675 287.443509 511.388802
[155] 199.463434 363.925365 199.463434 199.463434 363.925365 391.418692 90.610952
[162] 511.388802 363.925365 721.334118 111.981793 449.404392 127.478683 337.431724
[169] 363.925365 252.576544 391.418692 419.911702 449.404392 647.353683 647.353683
[176] 543.880518 479.896759 543.880518 363.925365 391.418692 449.404392 241.454071
[183] 479.896759 449.404392 363.925365 449.404392 479.896759 611.862961 449.404392
[190] 363.925365 97.484633 391.418692 241.454071 199.463434 241.454071 161.471628
[197] 391.418692 337.431724 391.418692 97.484633 449.404392 71.489517 199.463434
[204] 449.404392 71.489517 511.388802 161.471628 511.388802 647.353683 71.489517
[211] 161.471628 391.418692 391.418692 511.388802 511.388802 449.404392 511.388802
[218] 577.371905 647.353683 391.418692 511.388802 647.353683 161.471628 391.418692
[225] 511.388802 449.404392 577.371905 71.489517 449.404392 97.484633 97.484633
[232] 287.443509 577.371905 287.443509 511.388802 161.471628 391.418692 241.454071
[239] 683.844069 611.862961 560.501253 683.844069 511.388802 511.388802 647.353683
[246] 449.404392 511.388802 97.484633 647.353683 683.844069 543.880518 449.404392
[253] 611.862961 311.937771 479.896759 611.862961 683.844069 311.937771 179.967675
[260] 647.353683 721.334118 263.948941 683.844069 419.911702 503.422072 577.371905
[267] 391.418692 647.353683 647.353683 647.353683 560.501253 693.122863 363.925365
[274] 443.425879 721.334118 721.334118 419.911702 611.862961 702.464136 721.334118
[281] 527.509701 611.862961 711.867887 311.937771 464.525616 511.388802 503.422072
[288] 391.418692 511.388802 97.484633 241.454071 577.371905 337.431724 241.454071
[295] 127.478683 577.371905 127.478683 511.388802 241.454071 721.334118 391.418692
[302] 391.418692 199.463434 391.418692 17.498279 449.404392 391.418692 199.463434
[309] 391.418692 241.454071 241.454071 287.443509 511.388802 449.404392 199.463434
[316] 241.454071 97.484633 337.431724 199.463434 577.371905 449.404392 449.404392
[323] 31.496281 577.371905 391.418692 287.443509 449.404392 391.418692 161.471628
[330] 577.371905 127.478683 721.334118 449.404392 161.471628 337.431724 577.371905
[337] 577.371905 127.478683 577.371905 391.418692 161.471628 161.471628 199.463434
[344] 31.496281 241.454071 31.496281 7.499463 511.388802 577.371905 577.371905
[351] 161.471628 49.493382 391.418692 479.896759 337.431724 391.418692 683.844069
[358] 391.418692 59.991574 449.404392 479.896759 721.334118 721.334118 577.371905
[365] 391.418692 391.418692 241.454071 577.371905 449.404392 199.463434 199.463434
[372] 391.418692 577.371905 577.371905 721.334118 577.371905 577.371905 577.371905
[379] 161.471628 127.478683 647.353683 721.334118 199.463434 241.454071 127.478683
[386] 161.471628 391.418692 449.404392 49.493382 449.404392 511.388802 391.418692
[393] 127.478683 241.454071 449.404392 577.371905 577.371905 287.443509 337.431724
[400] 449.404392 1.499953 287.443509 449.404392 511.388802 161.471628 97.484633
[407] 577.371905 17.498279 449.404392 391.418692 337.431724 577.371905 511.388802
[414] 287.443509 337.431724 287.443509 287.443509 647.353683 337.431724 391.418692
[421] 287.443509 337.431724 337.431724 391.418692 1.499953 577.371905 71.489517
[428] 647.353683 241.454071
attr(,"lambda")
[1] 1.999927
# (,"lambda") = 1.999927 (basically square)
#https://rpubs.com/frasermyers/627589
#PFG
Imp1stUG_012325_p <- Imp1stUG_012325 |>
dplyr::filter(Group == 'PFG')
#view(Imp1stUG_012325_p)
#IFG
Imp1stUG_012325_i <- Imp1stUG_012325 |>
dplyr::filter(Group == 'IFG')
#view(Imp1stUG_012325_i)
# #histogram: PFG
# hist(Imp1stUG_012325_p$score)
# #better
# hist(Imp1stUG_012325_p$score_sqr)
# #worse
# hist(Imp1stUG_012325_p$score_cube)
# hist(Imp1stUG_012325_p$score_bx_scale)
#
#
# #histogram: IFG
# hist(Imp1stUG_012325_i$score)
# hist(Imp1stUG_012325_i$score_sqr)
# #worse
# hist(Imp1stUG_012325_i$score_cube)
# hist(Imp1stUG_012325_i$score_bx_scale)
### LMER1 ####
#group fixed effect, predicting score_bx_scale
#class random effect
M1 <- lmer(score_bx_scale ~ Group +
(1 | Section),
Imp1stUG_012325)
summary(M1)
Linear mixed model fit by REML. t-tests use Satterthwaite's method [lmerModLmerTest
]
Formula: score_bx_scale ~ Group + (1 | Section)
Data: Imp1stUG_012325
REML criterion at convergence: 1172.8
Scaled residuals:
Min 1Q Median 3Q Max
-2.53078 -0.77398 0.09862 0.72540 2.37599
Random effects:
Groups Name Variance Std.Dev.
Section (Intercept) 0.1669 0.4086
Residual 0.8291 0.9105
Number of obs: 429, groups: Section, 21
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 0.09527 0.13978 19.83411 0.682 0.503
GroupPFG -0.23955 0.20091 19.38070 -1.192 0.248
Correlation of Fixed Effects:
(Intr)
GroupPFG -0.696
#R2
r.squaredGLMM(M1)
R2m R2c
[1,] 0.01423152 0.1794556
#gives nice representation of output
tab_model(M1)
#Effect size
effectsize::eta_squared(M1)
# Effect Size for ANOVA (Type III)
Parameter | Eta2 (partial) | 95% CI
-----------------------------------------
Group | 0.07 | [0.00, 1.00]
- One-sided CIs: upper bound fixed at [1.00].